#Libraries to be used
library(tidyverse)
library(survival)
library(rstudioapi)
library(countrycode)
library(broom)
library(psych)

#Set working directory
fileloc <- getSourceEditorContext()$path %>%
  str_remove("ForestCarbonGeographyReplication.R")
setwd(fileloc)

#Read in data
hexes <- readRDS("ForestCarbonHexDatasetFINAL.rds")

#Final variable preparation for modeling
hexes$FH <- (hexes$PR + hexes$CL)/2
hexes$LandDealsSqKM <- hexes$LandDeals/hexes$HexAreaSqKM
hexes$ProtPct[hexes$ProtPct>100] <- 100
hexes$Continent <- countrycode(hexes$ISO_A3, origin="iso3c", destination="continent")

#Subset to modeling variables
hexes_scale <- hexes[,c("yearstart", "year", "proj_start",
                        "Continent", "Crop", "HFoot", "GDP", "ForPctCh", "GPP", "FH",
                        "ZonalRichn", "LandDealsSqKM", "EnvDefPMil", "ForSqKM",
                        "ForPct", "ProtPct", "AgPctGDP")]

#Create Table A1 - Data summary
hexes_sum <- describe(hexes_scale[complete.cases(hexes_scale),])

write.csv(hexes_sum, "TableA1.csv")

#Scale variables by standard deviations
for(i in 5:ncol(hexes_scale)){
  hexes_scale[,i] <- base::scale(hexes_scale[,i])
}

#Model with all countries
mod.base <- coxph(Surv(time = yearstart, time2 = year, event = proj_start) ~ 
                   HFoot + Crop + ForPctCh + GPP + ZonalRichn + LandDealsSqKM + EnvDefPMil + ForPct + ProtPct +
                   FH + GDP + AgPctGDP + frailty(Continent), data = hexes_scale)
mod.base.ty <- tidy(mod.base, conf.int=0.99)
mod.base.sum <- summary(mod.base, conf.int=0.99)


#Create dataset with only FCPF and UNREDD countries
inst <- read.csv("UNREDDFCPF.csv", stringsAsFactors = FALSE) %>%
  mutate(institution = (UNREDD + FCPF) > 0)

hexes <- hexes %>%
  left_join(inst)

hexes_scale <- hexes[hexes$institution, c("yearstart", "year", "proj_start",
                        "Continent", "Crop", "HFoot", "GDP", "GPP", "ForPct", "ForPctCh", "ProtPct",
                        "ZonalRichn", "LandDealsSqKM", "EnvDefPMil", "ForSqKM", "FH",
                        "AgPctGDP")]

#Create Table A2 - Data summary
hexes_inst_sum <- describe(hexes_scale[complete.cases(hexes_scale),])

write.csv(hexes_inst_sum, "TableA2.csv")

#Scale to standard deviations for modeling
for(i in 5:ncol(hexes_scale)){
  hexes_scale[,i] <- scale(hexes_scale[,i])
}

#Model with UNREDD/FCPF countries only
mod.inst <- coxph(Surv(time = yearstart, time2 = year, event = proj_start) ~ 
                    HFoot + Crop + ForPctCh + GPP + ZonalRichn + LandDealsSqKM + EnvDefPMil + ForPct + ProtPct +
                    FH + GDP + AgPctGDP + frailty(Continent), data = hexes_scale)
mod.inst.ty <- tidy(mod.inst, conf.int=0.99)
mod.inst.sum <- summary(mod.inst, conf.int=0.99)

#Create dataset with only the US
hexes_scale <- hexes[hexes$ISO_A3=="USA", c("yearstart", "year", "proj_start", 
                                          "ISO_A3", "Crop", "ForPctCh", "HFoot", "GDP", "GPP", "ForPct", "ProtPct",
                                          "ZonalRichn", "LandDealsSqKM", "EnvDef", "ForSqKM")]

#Create Table A3 - Data summary
hexes_us_sum <- describe(hexes_scale[complete.cases(hexes_scale),])

write.csv(hexes_us_sum, "TableA3.csv")

#Convert variables to standard deviations for modeling
for(i in 5:ncol(hexes_scale)){
  hexes_scale[,i] <- scale(hexes_scale[,i])
}

#Model with US only
mod.us <- coxph(Surv(time = yearstart, time2 = year, event = proj_start) ~ 
                  HFoot + Crop + ForPctCh + GPP + ZonalRichn  + ForPct + ProtPct +
                  GDP, data = hexes_scale)
mod.us.ty <- tidy(mod.us, conf.int=0.99)
mod.us.sum <- summary(mod.us, conf.int=0.99)

#Plot model results - Figure 2
mod.base.ty <- mod.base.ty %>%
  mutate(Model = "All Observations")

mod.inst.ty <- mod.inst.ty %>%
  mutate(Model = "UNREDD/FCPF Countries Only")

mod.us.ty <- mod.us.ty %>%
  mutate(Model = "United States Only")

mod.plot <- rbind(mod.base.ty, mod.inst.ty, mod.us.ty)

labels <- c("H2: Human Footprint", "H2: Cropland (%)", "H2: Forest Change in Past 5 Years (%)", "H1: Gross Primary Productivity",
            "H3: IUCN Species Richness", "H2: Cumulative Land Deals/Sq. KM", "H5: Murders of Env. Defenders per 1 Mil.",
            "C: Forest Area (%)", "H3: WDPA Protected Area (%)", "H4: Freedom House Score", 
            "C: Local GDP", "C: Primary Value Added as % of GDP", NA)
vars <- mod.plot$term[1:13]

mod.plot <- mod.plot %>%
  mutate(var = labels[match(term, vars)],
         sig = (conf.low * conf.high) > 0) %>%
  filter(!is.na(var)) %>%
  mutate(var = factor(var, levels=c("C: Primary Value Added as % of GDP", "C: Local GDP",
                                    "C: Forest Area (%)", "H5: Murders of Env. Defenders per 1 Mil.",
                                    "H4: Freedom House Score", "H3: WDPA Protected Area (%)", "H3: IUCN Species Richness",
                                    "H2: Forest Change in Past 5 Years (%)",
                                    "H2: Cumulative Land Deals/Sq. KM", "H2: Cropland (%)",
                                    "H2: Human Footprint", "H1: Gross Primary Productivity"),
                      ordered = TRUE))

g <- ggplot(data=mod.plot, aes(x = var, y = exp(estimate))) +
  geom_hline(yintercept = 1, linetype = 2) +
  geom_errorbar(aes(ymin = exp(conf.low), ymax = exp(conf.high)))+
  geom_point(size=4, position = position_dodge(width = 0.5))+
  geom_point(data=mod.plot[mod.plot$sig,], color="white", size=2) +
  scale_x_discrete("Independent Variable")+
  scale_y_continuous("Estimated Multiplicative Change in Odds of Project Establishment\nwith a One Standard Deviation Change in Independent Variable")+
  facet_wrap(~Model, ncol=3, scales = "free_x") +
  coord_flip()+
  theme(
    legend.title=element_text(size=14, face="bold"),
    plot.title=element_text(size=14, face="bold"),
    axis.title=element_text(size=14, face="bold"),
    axis.text=element_text(size=12),
    axis.text.x = element_text(angle = 315, vjust = 1, hjust=0),
    legend.text=element_text(size=12),
    strip.text = element_text(size=14, face="bold"),
    legend.position="bottom"
  )
png("Figure2.png", width=14, height=7, units = "in", res=300)
g
dev.off()

#Plot basic information on projects - part of Figure 1
projs.plot <- read.csv("ForestPreservationDataset05292023.csv", stringsAsFactors = FALSE)

projs.plot$starting_year[projs.plot$starting_year==9999] <- NA

fig1 <- projs.plot %>%
  filter(!is.na(starting_year)) %>%
  group_by(starting_year) %>%
  count() %>%
  ungroup() %>%
  mutate(cumulative = cumsum(n)) %>%
  ggplot(aes(x=starting_year, y=cumulative)) +
  geom_area(fill="black")+
  scale_x_continuous("") +
  scale_y_continuous("Cumulative Number of\nProjects Established") +
  theme(
    legend.title=element_text(size=14, face="bold"),
    plot.title=element_text(size=14, face="bold"),
    axis.title=element_text(size=32, face="bold"),
    axis.text=element_text(size=28),
    axis.text.x = element_text(angle = 315, vjust = 1, hjust=0),
    legend.text=element_text(size=12),
    strip.text = element_text(size=14, face="bold"),
    legend.position="bottom"
  )
fig1